diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index e0a3904bc0c3..86cc7c22b530 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -70,7 +70,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return cir::ConstantOp::create(*this, loc, attr); } - // Creates constant null value for integral type ty. + // Creates constant null value for the given type ty. cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc) { return cir::ConstantOp::create(*this, loc, getZeroInitAttr(ty)); } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 1275301d5c92..c9e5996b9360 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -994,7 +994,7 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { /// Create a unary shuffle. The second vector operand of the IR instruction /// is poison. return createVecShuffle( - loc, vec1, getConstant(loc, cir::PoisonAttr::get(vec1.getType())), + loc, vec1, getConstant(loc, getAttr(vec1.getType())), mask); } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 4b0f799737e0..2b14a2b8d1b9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -201,6 +201,85 @@ static mlir::Value emitX86PSLLDQIByteShift(CIRGenFunction &cgf, return builder.createBitcast(shuffleResult, resultType); } +static mlir::Value emitX86MaskedCompareResult(CIRGenFunction &cgf, + mlir::Value cmp, unsigned numElts, + mlir::Value maskIn, + mlir::Location loc) { + if (maskIn) { + llvm_unreachable("NYI"); + } + if (numElts < 8) { + int64_t indices[8]; + for (unsigned i = 0; i != numElts; ++i) + indices[i] = i; + for (unsigned i = numElts; i != 8; ++i) + indices[i] = i % numElts + numElts; + + // This should shuffle between cmp (first vector) and null (second vector) + mlir::Value nullVec = cgf.getBuilder().getNullValue(cmp.getType(), loc); + cmp = cgf.getBuilder().createVecShuffle(loc, cmp, nullVec, indices); + } + return cgf.getBuilder().createBitcast( + cmp, cgf.getBuilder().getUIntNTy(std::max(numElts, 8U))); +} + +static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc, + bool isSigned, + ArrayRef ops, + mlir::Location loc) { + assert((ops.size() == 2 || ops.size() == 4) && + "Unexpected number of arguments"); + unsigned numElts = cast(ops[0].getType()).getSize(); + mlir::Value cmp; + + if (cc == 3) { + llvm_unreachable("NYI"); + } else if (cc == 7) { + llvm_unreachable("NYI"); + } else { + cir::CmpOpKind pred; + switch (cc) { + default: + llvm_unreachable("Unknown condition code"); + case 0: + pred = cir::CmpOpKind::eq; + break; + case 1: + pred = cir::CmpOpKind::lt; + break; + case 2: + pred = cir::CmpOpKind::le; + break; + case 4: + pred = cir::CmpOpKind::ne; + break; + case 5: + pred = cir::CmpOpKind::ge; + break; + case 6: + pred = cir::CmpOpKind::gt; + break; + } + + auto resultTy = cgf.getBuilder().getType( + cgf.getBuilder().getUIntNTy(1), numElts); + cmp = cgf.getBuilder().create(loc, resultTy, pred, ops[0], + ops[1]); + } + + mlir::Value maskIn; + if (ops.size() == 4) + maskIn = ops[3]; + + return emitX86MaskedCompareResult(cgf, cmp, numElts, maskIn, loc); +} + +static mlir::Value emitX86ConvertToMask(CIRGenFunction &cgf, mlir::Value in, + mlir::Location loc) { + cir::ConstantOp zero = cgf.getBuilder().getNullValue(in.getType(), loc); + return emitX86MaskedCompare(cgf, 1, true, {in, zero}, loc); +} + mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == Builtin::BI__builtin_cpu_is) @@ -547,6 +626,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtmask2q512: return emitX86SExtMask(*this, Ops[0], convertType(E->getType()), getLoc(E->getExprLoc())); + + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + return emitX86ConvertToMask(*this, Ops[0], getLoc(E->getExprLoc())); case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: diff --git a/clang/test/CIR/CodeGen/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGen/X86/avx512bw-builtins.c index 0af4327d9623..978a676f6fc1 100644 --- a/clang/test/CIR/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CIR/CodeGen/X86/avx512bw-builtins.c @@ -6,7 +6,10 @@ // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s + +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG #include @@ -73,3 +76,31 @@ __m512i test_mm512_maskz_loadu_epi8(__mmask64 __U, void const *__P) { // LLVM: @llvm.masked.load.v64i8.p0(ptr %{{.*}}, i32 1, <64 x i1> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_maskz_loadu_epi8(__U, __P); } + +__mmask64 test_mm512_movepi8_mask(__m512i __A) { + // CIR-LABEL: @_mm512_movepi8_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 64>, !cir.vector x 64> + + // LLVM-LABEL: @test_mm512_movepi8_mask + // LLVM: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer + + // In the unsigned case below, the canonicalizer proves the comparison is + // always false (no i8 unsigned value can be < 0) and folds it away. + // LLVM-UNSIGNED-CHAR: store i64 0, ptr %{{.*}}, align 8 + + // OGCG-LABEL: @test_mm512_movepi8_mask + // OGCG: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer + return _mm512_movepi8_mask(__A); +} + +__mmask32 test_mm512_movepi16_mask(__m512i __A) { + // CIR-LABEL: @_mm512_movepi16_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector, !cir.vector x 32> + + // LLVM-LABEL: @test_mm512_movepi16_mask + // LLVM: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm512_movepi16_mask + // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer + return _mm512_movepi16_mask(__A); +} diff --git a/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c b/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c index 11a007e89cb9..cf0853b36223 100644 --- a/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c @@ -2,6 +2,7 @@ // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-llvm -o %t.ll -Wall -Werror // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=OGCG #include @@ -49,4 +50,28 @@ __m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) { // LLVM-LABEL: @test_mm512_inserti64x2 // LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_inserti64x2(__A, __B, 1); -} \ No newline at end of file +} + +__mmask16 test_mm512_movepi32_mask(__m512i __A) { + // CIR-LABEL: _mm512_movepi32_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector, !cir.vector x 16> + + // LLVM-LABEL: @test_mm512_movepi32_mask + // LLVM: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm512_movepi32_mask + // OGCG: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer + return _mm512_movepi32_mask(__A); +} + +__mmask8 test_mm512_movepi64_mask(__m512i __A) { + // CIR-LABEL: @_mm512_movepi64_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector, !cir.vector x 8> + + // LLVM-LABEL: @test_mm512_movepi64_mask + // LLVM: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm512_movepi64_mask + // OGCG: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer + return _mm512_movepi64_mask(__A); +} diff --git a/clang/test/CIR/CodeGen/X86/avx512vlbw-buiiltins.c b/clang/test/CIR/CodeGen/X86/avx512vlbw-buiiltins.c index 358d8ead5395..19a3c6ce47d3 100644 --- a/clang/test/CIR/CodeGen/X86/avx512vlbw-buiiltins.c +++ b/clang/test/CIR/CodeGen/X86/avx512vlbw-buiiltins.c @@ -6,13 +6,16 @@ // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG #include @@ -188,3 +191,31 @@ __m512i test_mm512_maskz_load_epi32(__mmask16 __U, void const *__P) { // LLVM: @llvm.masked.load.v16i32.p0(ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_maskz_load_epi32(__U, __P); } + +__mmask16 test_mm_movepi8_mask(__m128i __A) { + // CIR-LABEL: _mm_movepi8_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 16>, !cir.vector x 16> + + // LLVM-LABEL: @test_mm_movepi8_mask + // LLVM: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer + + // In the unsigned case below, the canonicalizer proves the comparison is + // always false (no i8 unsigned value can be < 0) and folds it away. + // LLVM-UNSIGNED-CHAR: store i16 0, ptr %{{.*}}, align 2 + + // OGCG-LABEL: @test_mm_movepi8_mask + // OGCG: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer + return _mm_movepi8_mask(__A); +} + +__mmask16 test_mm256_movepi16_mask(__m256i __A) { + // CIR-LABEL: _mm256_movepi16_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector, !cir.vector x 16> + + // LLVM-LABEL: @test_mm256_movepi16_mask + // LLVM: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm256_movepi16_mask + // OGCG: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer + return _mm256_movepi16_mask(__A); +} diff --git a/clang/test/CIR/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CIR/CodeGen/X86/avx512vldq-builtins.c index aa13d18f7d10..b202405e824a 100644 --- a/clang/test/CIR/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CIR/CodeGen/X86/avx512vldq-builtins.c @@ -2,6 +2,7 @@ // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG #include @@ -83,3 +84,43 @@ __m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) { // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> return _mm256_inserti64x2(__A, __B, 1); } + +__mmask8 test_mm256_movepi32_mask(__m256i __A) { + // LLVM-LABEL: @test_mm256_movepi32_mask + // LLVM: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer + + // OGCG-LABEL: @test_mm256_movepi32_mask + // OGCG: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer + return _mm256_movepi32_mask(__A); +} + +__mmask8 test_mm_movepi64_mask(__m128i __A) { + // CIR-LABEL: _mm_movepi64_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector, !cir.vector x 2> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector x 8> + // CIR: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.vector x 8>), !u8i + + // LLVM-LABEL: @test_mm_movepi64_mask + // LLVM: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer + // LLVM: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> + + // OGCG-LABEL: @test_mm_movepi64_mask + // OGCG: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer + // OGCG: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> + return _mm_movepi64_mask(__A); +} + +__mmask8 test_mm256_movepi64_mask(__m256i __A) { + // CIR-LABEL: _mm256_movepi64_mask + // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector, !cir.vector x 4> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector x 8> + + // LLVM-LABEL: @test_mm256_movepi64_mask + // LLVM: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer + // LLVM: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> + + // OGCG-LABEL: @test_mm256_movepi64_mask + // OGCG: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer + // OGCG: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> + return _mm256_movepi64_mask(__A); +}