-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[CIR][X86] Add support for vpcom builtins
#170362
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clangir Author: Vishruth Thimmaiah (vishruth-thimmaiah) ChangesAdds support for the Part of #167765 Full diff: https://github.com/llvm/llvm-project/pull/170362.diff 2 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 224a182ed17d1..7f8cd2768a10f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -168,6 +168,62 @@ static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
}
+static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
+ llvm::SmallVector<mlir::Value> ops,
+ bool isSigned) {
+ mlir::Value op0 = ops[0];
+ mlir::Value op1 = ops[1];
+
+ cir::VectorType ty = cast<cir::VectorType>(op0.getType());
+ mlir::Type elementTy = ty.getElementType();
+
+ uint64_t imm =
+ ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
+ 0x7;
+
+ cir::CmpOpKind pred;
+ switch (imm) {
+ case 0x0:
+ pred = cir::CmpOpKind::lt;
+ break;
+ case 0x1:
+ pred = cir::CmpOpKind::le;
+ break;
+ case 0x2:
+ pred = cir::CmpOpKind::gt;
+ break;
+ case 0x3:
+ pred = cir::CmpOpKind::ge;
+ break;
+ case 0x4:
+ pred = cir::CmpOpKind::eq;
+ break;
+ case 0x5:
+ pred = cir::CmpOpKind::ne;
+ break;
+ case 0x6:
+ return builder.getNullValue(ty, loc); // FALSE
+ case 0x7: {
+ llvm::APInt allOnes =
+ llvm::APInt::getAllOnes(cast<cir::IntType>(elementTy).getWidth());
+ return cir::VecSplatOp::create(
+ builder, loc, ty,
+ builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
+ }
+ default:
+ llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
+ }
+
+ if (!isSigned) {
+ elementTy = builder.getUIntNTy(cast<cir::IntType>(elementTy).getWidth());
+ ty = cir::VectorType::get(elementTy, ty.getSize());
+ op0 = builder.createBitcast(op0, ty);
+ op1 = builder.createBitcast(op1, ty);
+ }
+
+ return builder.createVecCompare(loc, pred, op0, op1);
+}
+
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -900,14 +956,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_ucmpq128_mask:
case X86::BI__builtin_ia32_ucmpq256_mask:
case X86::BI__builtin_ia32_ucmpq512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_vpcomb:
case X86::BI__builtin_ia32_vpcomw:
case X86::BI__builtin_ia32_vpcomd:
case X86::BI__builtin_ia32_vpcomq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
case X86::BI__builtin_ia32_vpcomub:
case X86::BI__builtin_ia32_vpcomuw:
case X86::BI__builtin_ia32_vpcomud:
case X86::BI__builtin_ia32_vpcomuq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi:
case X86::BI__builtin_ia32_kortestcsi:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c
new file mode 100644
index 0000000000000..10ef735fbbdb7
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c
@@ -0,0 +1,217 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+
+// This test mimics clang/test/CodeGen/X86/xop-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <x86intrin.h>
+
+__m128i test_mm_com_epu8(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu8
+ // CIR: %[[A:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
+ // CIR: %[[B:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[A]], %[[B]]) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu8
+ // LLVM: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu8
+ // OGCG: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+ return _mm_com_epu8(a, b, 0);
+}
+
+__m128i test_mm_com_epu16(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu16
+ // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
+ // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu16
+ // LLVM: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu16
+ // OGCG: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+ return _mm_com_epu16(a, b, 0);
+}
+
+__m128i test_mm_com_epu32(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu32
+ // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
+ // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu32
+ // LLVM: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu32
+ // OGCG: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+ return _mm_com_epu32(a, b, 0);
+}
+
+__m128i test_mm_com_epu64(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu64
+ // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
+ // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu64
+ // LLVM: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu64
+ // OGCG: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+ return _mm_com_epu64(a, b, 0);
+}
+
+__m128i test_mm_com_epi8(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi8
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi8
+ // LLVM: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi8
+ // OGCG: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+ return _mm_com_epi8(a, b, 0);
+}
+
+__m128i test_mm_com_epi16(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi16
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi16
+ // LLVM: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi16
+ // OGCG: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+ return _mm_com_epi16(a, b, 0);
+}
+
+__m128i test_mm_com_epi32(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi32
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi32
+ // LLVM: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi32
+ // OGCG: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+ return _mm_com_epi32(a, b, 0);
+}
+
+__m128i test_mm_com_epi64(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi64
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi64
+ // LLVM: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi64
+ // OGCG: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+ return _mm_com_epi64(a, b, 0);
+}
+
+__m128i test_mm_com_epi32_false(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi32_false
+ // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi32_false
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
+ // LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
+ // LLVM: ret <2 x i64> %[[ZERO]]
+
+ // OGCG-LABEL: test_mm_com_epi32_false
+ // OGCG: ret <2 x i64> zeroinitializer
+ return _mm_com_epi32(a, b, 6);
+}
+
+__m128i test_mm_com_epu32_false(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu32_false
+ // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu32_false
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
+ // LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
+ // LLVM: ret <2 x i64> %[[ZERO]]
+
+ // OGCG-LABEL: test_mm_com_epu32_false
+ // OGCG: ret <2 x i64> zeroinitializer
+ return _mm_com_epu32(a, b, 6);
+}
+
+__m128i test_mm_com_epi32_true(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi32_true
+ // CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
+ // CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi32_true
+ // LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
+ // LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
+ // LLVM: ret <2 x i64> %[[SPLAT]]
+
+ // OGCG-LABEL: test_mm_com_epi32_true
+ // OGCG: ret <2 x i64> splat (i64 -1)
+ return _mm_com_epi32(a, b, 7);
+}
+
+__m128i test_mm_com_epu32_true(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu32_true
+ // CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
+ // CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu32_true
+ // LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
+ // LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
+ // LLVM: ret <2 x i64> %[[SPLAT]]
+
+ // OGCG-LABEL: test_mm_com_epu32_true
+ // OGCG: ret <2 x i64> splat (i64 -1)
+ return _mm_com_epu32(a, b, 7);
+}
|
|
@llvm/pr-subscribers-clang Author: Vishruth Thimmaiah (vishruth-thimmaiah) ChangesAdds support for the Part of #167765 Full diff: https://github.com/llvm/llvm-project/pull/170362.diff 2 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 224a182ed17d1..7f8cd2768a10f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -168,6 +168,62 @@ static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
}
+static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
+ llvm::SmallVector<mlir::Value> ops,
+ bool isSigned) {
+ mlir::Value op0 = ops[0];
+ mlir::Value op1 = ops[1];
+
+ cir::VectorType ty = cast<cir::VectorType>(op0.getType());
+ mlir::Type elementTy = ty.getElementType();
+
+ uint64_t imm =
+ ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
+ 0x7;
+
+ cir::CmpOpKind pred;
+ switch (imm) {
+ case 0x0:
+ pred = cir::CmpOpKind::lt;
+ break;
+ case 0x1:
+ pred = cir::CmpOpKind::le;
+ break;
+ case 0x2:
+ pred = cir::CmpOpKind::gt;
+ break;
+ case 0x3:
+ pred = cir::CmpOpKind::ge;
+ break;
+ case 0x4:
+ pred = cir::CmpOpKind::eq;
+ break;
+ case 0x5:
+ pred = cir::CmpOpKind::ne;
+ break;
+ case 0x6:
+ return builder.getNullValue(ty, loc); // FALSE
+ case 0x7: {
+ llvm::APInt allOnes =
+ llvm::APInt::getAllOnes(cast<cir::IntType>(elementTy).getWidth());
+ return cir::VecSplatOp::create(
+ builder, loc, ty,
+ builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
+ }
+ default:
+ llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
+ }
+
+ if (!isSigned) {
+ elementTy = builder.getUIntNTy(cast<cir::IntType>(elementTy).getWidth());
+ ty = cir::VectorType::get(elementTy, ty.getSize());
+ op0 = builder.createBitcast(op0, ty);
+ op1 = builder.createBitcast(op1, ty);
+ }
+
+ return builder.createVecCompare(loc, pred, op0, op1);
+}
+
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -900,14 +956,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_ucmpq128_mask:
case X86::BI__builtin_ia32_ucmpq256_mask:
case X86::BI__builtin_ia32_ucmpq512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_vpcomb:
case X86::BI__builtin_ia32_vpcomw:
case X86::BI__builtin_ia32_vpcomd:
case X86::BI__builtin_ia32_vpcomq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
case X86::BI__builtin_ia32_vpcomub:
case X86::BI__builtin_ia32_vpcomuw:
case X86::BI__builtin_ia32_vpcomud:
case X86::BI__builtin_ia32_vpcomuq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi:
case X86::BI__builtin_ia32_kortestcsi:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c
new file mode 100644
index 0000000000000..10ef735fbbdb7
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c
@@ -0,0 +1,217 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+
+// This test mimics clang/test/CodeGen/X86/xop-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <x86intrin.h>
+
+__m128i test_mm_com_epu8(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu8
+ // CIR: %[[A:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
+ // CIR: %[[B:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[A]], %[[B]]) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu8
+ // LLVM: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu8
+ // OGCG: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+ return _mm_com_epu8(a, b, 0);
+}
+
+__m128i test_mm_com_epu16(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu16
+ // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
+ // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu16
+ // LLVM: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu16
+ // OGCG: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+ return _mm_com_epu16(a, b, 0);
+}
+
+__m128i test_mm_com_epu32(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu32
+ // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
+ // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu32
+ // LLVM: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu32
+ // OGCG: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+ return _mm_com_epu32(a, b, 0);
+}
+
+__m128i test_mm_com_epu64(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu64
+ // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
+ // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu64
+ // LLVM: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epu64
+ // OGCG: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+ return _mm_com_epu64(a, b, 0);
+}
+
+__m128i test_mm_com_epi8(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi8
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi8
+ // LLVM: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi8
+ // OGCG: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
+ // OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
+ return _mm_com_epi8(a, b, 0);
+}
+
+__m128i test_mm_com_epi16(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi16
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi16
+ // LLVM: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi16
+ // OGCG: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
+ // OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
+ return _mm_com_epi16(a, b, 0);
+}
+
+__m128i test_mm_com_epi32(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi32
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
+ // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi32
+ // LLVM: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi32
+ // OGCG: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
+ // OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
+ return _mm_com_epi32(a, b, 0);
+}
+
+__m128i test_mm_com_epi64(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi64
+ // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi64
+ // LLVM: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
+ // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+
+ // OGCG-LABEL: test_mm_com_epi64
+ // OGCG: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
+ // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
+ return _mm_com_epi64(a, b, 0);
+}
+
+__m128i test_mm_com_epi32_false(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi32_false
+ // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi32_false
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
+ // LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
+ // LLVM: ret <2 x i64> %[[ZERO]]
+
+ // OGCG-LABEL: test_mm_com_epi32_false
+ // OGCG: ret <2 x i64> zeroinitializer
+ return _mm_com_epi32(a, b, 6);
+}
+
+__m128i test_mm_com_epu32_false(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu32_false
+ // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu32_false
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
+ // LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
+ // LLVM: ret <2 x i64> %[[ZERO]]
+
+ // OGCG-LABEL: test_mm_com_epu32_false
+ // OGCG: ret <2 x i64> zeroinitializer
+ return _mm_com_epu32(a, b, 6);
+}
+
+__m128i test_mm_com_epi32_true(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epi32_true
+ // CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
+ // CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epi32_true
+ // LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
+ // LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
+ // LLVM: ret <2 x i64> %[[SPLAT]]
+
+ // OGCG-LABEL: test_mm_com_epi32_true
+ // OGCG: ret <2 x i64> splat (i64 -1)
+ return _mm_com_epi32(a, b, 7);
+}
+
+__m128i test_mm_com_epu32_true(__m128i a, __m128i b) {
+ // CIR-LABEL: test_mm_com_epu32_true
+ // CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
+ // CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
+ // CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: test_mm_com_epu32_true
+ // LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
+ // LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
+ // LLVM: ret <2 x i64> %[[SPLAT]]
+
+ // OGCG-LABEL: test_mm_com_epu32_true
+ // OGCG: ret <2 x i64> splat (i64 -1)
+ return _mm_com_epu32(a, b, 7);
+}
|
andykaylor
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good. I have just a couple of requests.
bcardosolopes
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have nothing else to add, LGTM
ba58425 to
8692dc8
Compare
Adds support for the `__builtin_ia32_vpcom` and `__builtin_ia32_vpcomu` builtins. Signed-off-by: vishruth-thimmaiah <vishruththimmaiah@gmail.com>
Signed-off-by: vishruth-thimmaiah <vishruththimmaiah@gmail.com>
8692dc8 to
7af4f9f
Compare
andykaylor
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
Adds support for the
__builtin_ia32_vpcomand__builtin_ia32_vpcomuX86 builtins.Part of #167765