Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
return cir::ConstantOp::create(*this, loc, attr);
}

// Creates constant null value for integral type ty.
// Creates constant null value for the given type ty.
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc) {
return cir::ConstantOp::create(*this, loc, getZeroInitAttr(ty));
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CIR/CodeGen/CIRGenBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,7 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
/// Create a unary shuffle. The second vector operand of the IR instruction
/// is poison.
return createVecShuffle(
loc, vec1, getConstant(loc, cir::PoisonAttr::get(vec1.getType())),
loc, vec1, getConstant(loc, getAttr<cir::PoisonAttr>(vec1.getType())),
mask);
}

Expand Down
93 changes: 93 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,85 @@ static mlir::Value emitX86PSLLDQIByteShift(CIRGenFunction &cgf,
return builder.createBitcast(shuffleResult, resultType);
}

static mlir::Value emitX86MaskedCompareResult(CIRGenFunction &cgf,
mlir::Value cmp, unsigned numElts,
mlir::Value maskIn,
mlir::Location loc) {
if (maskIn) {
llvm_unreachable("NYI");
}
if (numElts < 8) {
int64_t indices[8];
for (unsigned i = 0; i != numElts; ++i)
indices[i] = i;
for (unsigned i = numElts; i != 8; ++i)
indices[i] = i % numElts + numElts;

// This should shuffle between cmp (first vector) and null (second vector)
mlir::Value nullVec = cgf.getBuilder().getNullValue(cmp.getType(), loc);
cmp = cgf.getBuilder().createVecShuffle(loc, cmp, nullVec, indices);
}
return cgf.getBuilder().createBitcast(
cmp, cgf.getBuilder().getUIntNTy(std::max(numElts, 8U)));
}

static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc,
bool isSigned,
ArrayRef<mlir::Value> ops,
mlir::Location loc) {
assert((ops.size() == 2 || ops.size() == 4) &&
"Unexpected number of arguments");
unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
mlir::Value cmp;

if (cc == 3) {
llvm_unreachable("NYI");
} else if (cc == 7) {
llvm_unreachable("NYI");
} else {
cir::CmpOpKind pred;
switch (cc) {
default:
llvm_unreachable("Unknown condition code");
case 0:
pred = cir::CmpOpKind::eq;
break;
case 1:
pred = cir::CmpOpKind::lt;
break;
case 2:
pred = cir::CmpOpKind::le;
break;
case 4:
pred = cir::CmpOpKind::ne;
break;
case 5:
pred = cir::CmpOpKind::ge;
break;
case 6:
pred = cir::CmpOpKind::gt;
break;
}

auto resultTy = cgf.getBuilder().getType<cir::VectorType>(
cgf.getBuilder().getUIntNTy(1), numElts);
cmp = cgf.getBuilder().create<cir::VecCmpOp>(loc, resultTy, pred, ops[0],
ops[1]);
}

mlir::Value maskIn;
if (ops.size() == 4)
maskIn = ops[3];

return emitX86MaskedCompareResult(cgf, cmp, numElts, maskIn, loc);
}

static mlir::Value emitX86ConvertToMask(CIRGenFunction &cgf, mlir::Value in,
mlir::Location loc) {
cir::ConstantOp zero = cgf.getBuilder().getNullValue(in.getType(), loc);
return emitX86MaskedCompare(cgf, 1, true, {in, zero}, loc);
}

mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
if (BuiltinID == Builtin::BI__builtin_cpu_is)
Expand Down Expand Up @@ -547,6 +626,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtmask2q512:
return emitX86SExtMask(*this, Ops[0], convertType(E->getType()),
getLoc(E->getExprLoc()));

case X86::BI__builtin_ia32_cvtb2mask128:
case X86::BI__builtin_ia32_cvtb2mask256:
case X86::BI__builtin_ia32_cvtb2mask512:
case X86::BI__builtin_ia32_cvtw2mask128:
case X86::BI__builtin_ia32_cvtw2mask256:
case X86::BI__builtin_ia32_cvtw2mask512:
case X86::BI__builtin_ia32_cvtd2mask128:
case X86::BI__builtin_ia32_cvtd2mask256:
case X86::BI__builtin_ia32_cvtd2mask512:
case X86::BI__builtin_ia32_cvtq2mask128:
case X86::BI__builtin_ia32_cvtq2mask256:
case X86::BI__builtin_ia32_cvtq2mask512:
return emitX86ConvertToMask(*this, Ops[0], getLoc(E->getExprLoc()));
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
case X86::BI__builtin_ia32_cvtqq2pd512_mask:
Expand Down
33 changes: 32 additions & 1 deletion clang/test/CIR/CodeGen/X86/avx512bw-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s

// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG

#include <immintrin.h>

Expand Down Expand Up @@ -73,3 +76,31 @@ __m512i test_mm512_maskz_loadu_epi8(__mmask64 __U, void const *__P) {
// LLVM: @llvm.masked.load.v64i8.p0(ptr %{{.*}}, i32 1, <64 x i1> %{{.*}}, <64 x i8> %{{.*}})
return _mm512_maskz_loadu_epi8(__U, __P);
}

__mmask64 test_mm512_movepi8_mask(__m512i __A) {
// CIR-LABEL: @_mm512_movepi8_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 64>, !cir.vector<!cir.int<u, 1> x 64>

// LLVM-LABEL: @test_mm512_movepi8_mask
// LLVM: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer

// In the unsigned case below, the canonicalizer proves the comparison is
// always false (no i8 unsigned value can be < 0) and folds it away.
// LLVM-UNSIGNED-CHAR: store i64 0, ptr %{{.*}}, align 8

// OGCG-LABEL: @test_mm512_movepi8_mask
// OGCG: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
return _mm512_movepi8_mask(__A);
}

__mmask32 test_mm512_movepi16_mask(__m512i __A) {
// CIR-LABEL: @_mm512_movepi16_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 32>, !cir.vector<!cir.int<u, 1> x 32>

// LLVM-LABEL: @test_mm512_movepi16_mask
// LLVM: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer

// OGCG-LABEL: @test_mm512_movepi16_mask
// OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
return _mm512_movepi16_mask(__A);
}
27 changes: 26 additions & 1 deletion clang/test/CIR/CodeGen/X86/avx512dq-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=OGCG

#include <immintrin.h>

Expand Down Expand Up @@ -49,4 +50,28 @@ __m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) {
// LLVM-LABEL: @test_mm512_inserti64x2
// LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
return _mm512_inserti64x2(__A, __B, 1);
}
}

__mmask16 test_mm512_movepi32_mask(__m512i __A) {
// CIR-LABEL: _mm512_movepi32_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s32i x 16>, !cir.vector<!cir.int<u, 1> x 16>

// LLVM-LABEL: @test_mm512_movepi32_mask
// LLVM: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer

// OGCG-LABEL: @test_mm512_movepi32_mask
// OGCG: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
return _mm512_movepi32_mask(__A);
}

__mmask8 test_mm512_movepi64_mask(__m512i __A) {
// CIR-LABEL: @_mm512_movepi64_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 8>, !cir.vector<!cir.int<u, 1> x 8>

// LLVM-LABEL: @test_mm512_movepi64_mask
// LLVM: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer

// OGCG-LABEL: @test_mm512_movepi64_mask
// OGCG: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
return _mm512_movepi64_mask(__A);
}
33 changes: 32 additions & 1 deletion clang/test/CIR/CodeGen/X86/avx512vlbw-buiiltins.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s

// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

Expand Down Expand Up @@ -188,3 +191,31 @@ __m512i test_mm512_maskz_load_epi32(__mmask16 __U, void const *__P) {
// LLVM: @llvm.masked.load.v16i32.p0(ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
return _mm512_maskz_load_epi32(__U, __P);
}

__mmask16 test_mm_movepi8_mask(__m128i __A) {
// CIR-LABEL: _mm_movepi8_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 16>, !cir.vector<!cir.int<u, 1> x 16>

// LLVM-LABEL: @test_mm_movepi8_mask
// LLVM: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer

// In the unsigned case below, the canonicalizer proves the comparison is
// always false (no i8 unsigned value can be < 0) and folds it away.
// LLVM-UNSIGNED-CHAR: store i16 0, ptr %{{.*}}, align 2

// OGCG-LABEL: @test_mm_movepi8_mask
// OGCG: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
return _mm_movepi8_mask(__A);
}

__mmask16 test_mm256_movepi16_mask(__m256i __A) {
// CIR-LABEL: _mm256_movepi16_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 16>, !cir.vector<!cir.int<u, 1> x 16>

// LLVM-LABEL: @test_mm256_movepi16_mask
// LLVM: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer

// OGCG-LABEL: @test_mm256_movepi16_mask
// OGCG: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
return _mm256_movepi16_mask(__A);
}
41 changes: 41 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512vldq-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

Expand Down Expand Up @@ -83,3 +84,43 @@ __m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) {
// LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
return _mm256_inserti64x2(__A, __B, 1);
}

__mmask8 test_mm256_movepi32_mask(__m256i __A) {
// LLVM-LABEL: @test_mm256_movepi32_mask
// LLVM: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer

// OGCG-LABEL: @test_mm256_movepi32_mask
// OGCG: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
return _mm256_movepi32_mask(__A);
}

__mmask8 test_mm_movepi64_mask(__m128i __A) {
// CIR-LABEL: _mm_movepi64_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 2>, !cir.vector<!cir.int<u, 1> x 2>
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 1> x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<u, 1> x 8>
// CIR: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.vector<!cir.int<u, 1> x 8>), !u8i

// LLVM-LABEL: @test_mm_movepi64_mask
// LLVM: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
// LLVM: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>

// OGCG-LABEL: @test_mm_movepi64_mask
// OGCG: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
// OGCG: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
return _mm_movepi64_mask(__A);
}

__mmask8 test_mm256_movepi64_mask(__m256i __A) {
// CIR-LABEL: _mm256_movepi64_mask
// CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 4>, !cir.vector<!cir.int<u, 1> x 4>
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 1> x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.int<u, 1> x 8>

// LLVM-LABEL: @test_mm256_movepi64_mask
// LLVM: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
// LLVM: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

// OGCG-LABEL: @test_mm256_movepi64_mask
// OGCG: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
// OGCG: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
return _mm256_movepi64_mask(__A);
}
Loading