From 6a68853bccd6f1688a54aa7ce81dd9fc5fd00538 Mon Sep 17 00:00:00 2001 From: vishruth-thimmaiah Date: Sat, 29 Nov 2025 17:54:09 +0530 Subject: [PATCH 1/2] [CIR][X86] Add support for `vpcom` builtins Adds support for the `__builtin_ia32_vpcom` and `__builtin_ia32_vpcomu` builtins. Signed-off-by: vishruth-thimmaiah --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 66 +++++- .../CIR/CodeGenBuiltins/X86/xop-builtins.c | 200 ++++++++++++++++++ 2 files changed, 262 insertions(+), 4 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 9806b00a54e86..7e338f96d2167 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -315,6 +315,62 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, return builder.createMul(loc, lhs, rhs); } +static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, + llvm::SmallVector ops, + bool isSigned) { + mlir::Value op0 = ops[0]; + mlir::Value op1 = ops[1]; + + cir::VectorType ty = cast(op0.getType()); + mlir::Type elementTy = ty.getElementType(); + + uint64_t imm = + ops[2].getDefiningOp().getIntValue().getZExtValue() & + 0x7; + + cir::CmpOpKind pred; + switch (imm) { + case 0x0: + pred = cir::CmpOpKind::lt; + break; + case 0x1: + pred = cir::CmpOpKind::le; + break; + case 0x2: + pred = cir::CmpOpKind::gt; + break; + case 0x3: + pred = cir::CmpOpKind::ge; + break; + case 0x4: + pred = cir::CmpOpKind::eq; + break; + case 0x5: + pred = cir::CmpOpKind::ne; + break; + case 0x6: + return builder.getNullValue(ty, loc); // FALSE + case 0x7: { + llvm::APInt allOnes = + llvm::APInt::getAllOnes(cast(elementTy).getWidth()); + return cir::VecSplatOp::create( + builder, loc, ty, + builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE + } + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + if (!isSigned) { + elementTy = builder.getUIntNTy(cast(elementTy).getWidth()); + ty = cir::VectorType::get(elementTy, ty.getSize()); + op0 = builder.createBitcast(op0, ty); + op1 = builder.createBitcast(op1, ty); + } + + return builder.createVecCompare(loc, pred, op0, op1); +} + mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { @@ -1159,18 +1215,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_ucmpq128_mask: case X86::BI__builtin_ia32_ucmpq256_mask: case X86::BI__builtin_ia32_ucmpq512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_vpcomb: case X86::BI__builtin_ia32_vpcomw: case X86::BI__builtin_ia32_vpcomd: case X86::BI__builtin_ia32_vpcomq: + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true); case X86::BI__builtin_ia32_vpcomub: case X86::BI__builtin_ia32_vpcomuw: case X86::BI__builtin_ia32_vpcomud: case X86::BI__builtin_ia32_vpcomuq: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return {}; + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: case X86::BI__builtin_ia32_kortestcsi: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c index 0aaba7b46327d..2a8a107684e42 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c @@ -90,3 +90,203 @@ __m128i test_mm_roti_epi64(__m128i a) { // OGCG: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x i64> %[[VAR]], <2 x i64> splat (i64 100)) return _mm_roti_epi64(a, 100); } + +__m128i test_mm_com_epu8(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epu8 + // CIR: %[[A:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> + // CIR: %[[B:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[A]], %[[B]]) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i> + // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epu8 + // LLVM: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8> + // LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epu8 + // OGCG: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8> + // OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64> + return _mm_com_epu8(a, b, 0); +} + +__m128i test_mm_com_epu16(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epu16 + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i> + // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epu16 + // LLVM: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16> + // LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epu16 + // OGCG: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16> + // OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64> + return _mm_com_epu16(a, b, 0); +} + +__m128i test_mm_com_epu32(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epu32 + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i> + // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epu32 + // LLVM: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32> + // LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epu32 + // OGCG: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32> + // OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64> + return _mm_com_epu32(a, b, 0); +} + +__m128i test_mm_com_epu64(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epu64 + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epu64 + // LLVM: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epu64 + // OGCG: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + return _mm_com_epu64(a, b, 0); +} + +__m128i test_mm_com_epi8(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epi8 + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !s8i> + // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epi8 + // LLVM: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8> + // LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epi8 + // OGCG: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8> + // OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64> + return _mm_com_epi8(a, b, 0); +} + +__m128i test_mm_com_epi16(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epi16 + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i> + // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epi16 + // LLVM: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16> + // LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epi16 + // OGCG: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16> + // OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64> + return _mm_com_epi16(a, b, 0); +} + +__m128i test_mm_com_epi32(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epi32 + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i> + // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epi32 + // LLVM: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32> + // LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epi32 + // OGCG: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32> + // OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64> + return _mm_com_epi32(a, b, 0); +} + +__m128i test_mm_com_epi64(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epi64 + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epi64 + // LLVM: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + + // OGCG-LABEL: test_mm_com_epi64 + // OGCG: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + return _mm_com_epi64(a, b, 0); +} + +__m128i test_mm_com_epi32_false(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epi32_false + // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i> + // CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epi32_false + // LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16 + // LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16 + // LLVM: ret <2 x i64> %[[ZERO]] + + // OGCG-LABEL: test_mm_com_epi32_false + // OGCG: ret <2 x i64> zeroinitializer + return _mm_com_epi32(a, b, 6); +} + +__m128i test_mm_com_epu32_false(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epu32_false + // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i> + // CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epu32_false + // LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16 + // LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16 + // LLVM: ret <2 x i64> %[[ZERO]] + + // OGCG-LABEL: test_mm_com_epu32_false + // OGCG: ret <2 x i64> zeroinitializer + return _mm_com_epu32(a, b, 6); +} + +__m128i test_mm_com_epi32_true(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epi32_true + // CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i + // CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i> + // CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epi32_true + // LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16 + // LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16 + // LLVM: ret <2 x i64> %[[SPLAT]] + + // OGCG-LABEL: test_mm_com_epi32_true + // OGCG: ret <2 x i64> splat (i64 -1) + return _mm_com_epi32(a, b, 7); +} + +__m128i test_mm_com_epu32_true(__m128i a, __m128i b) { + // CIR-LABEL: test_mm_com_epu32_true + // CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i + // CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i> + // CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i> + + // LLVM-LABEL: test_mm_com_epu32_true + // LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16 + // LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16 + // LLVM: ret <2 x i64> %[[SPLAT]] + + // OGCG-LABEL: test_mm_com_epu32_true + // OGCG: ret <2 x i64> splat (i64 -1) + return _mm_com_epu32(a, b, 7); +} From 7af4f9f97e59324974ab2ca394624b0783336a1d Mon Sep 17 00:00:00 2001 From: vishruth-thimmaiah Date: Thu, 4 Dec 2025 21:39:02 +0530 Subject: [PATCH 2/2] add extra tests Signed-off-by: vishruth-thimmaiah --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 15 ++- .../CIR/CodeGenBuiltins/X86/xop-builtins.c | 108 +++++++++++++++++- 2 files changed, 110 insertions(+), 13 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 7e338f96d2167..1c1ef4da20b0d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -322,11 +322,9 @@ static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value op1 = ops[1]; cir::VectorType ty = cast(op0.getType()); - mlir::Type elementTy = ty.getElementType(); + cir::IntType elementTy = cast(ty.getElementType()); - uint64_t imm = - ops[2].getDefiningOp().getIntValue().getZExtValue() & - 0x7; + uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7; cir::CmpOpKind pred; switch (imm) { @@ -351,8 +349,7 @@ static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, case 0x6: return builder.getNullValue(ty, loc); // FALSE case 0x7: { - llvm::APInt allOnes = - llvm::APInt::getAllOnes(cast(elementTy).getWidth()); + llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth()); return cir::VecSplatOp::create( builder, loc, ty, builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE @@ -361,8 +358,10 @@ static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); } - if (!isSigned) { - elementTy = builder.getUIntNTy(cast(elementTy).getWidth()); + if ((!isSigned && elementTy.isSigned()) || + (isSigned && elementTy.isUnsigned())) { + elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth()) + : builder.getSIntNTy(elementTy.getWidth()); ty = cir::VectorType::get(elementTy, ty.getSize()); op0 = builder.createBitcast(op0, ty); op1 = builder.createBitcast(op1, ty); diff --git a/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c index 2a8a107684e42..88ccf29862848 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c @@ -93,9 +93,7 @@ __m128i test_mm_roti_epi64(__m128i a) { __m128i test_mm_com_epu8(__m128i a, __m128i b) { // CIR-LABEL: test_mm_com_epu8 - // CIR: %[[A:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> - // CIR: %[[B:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> - // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[A]], %[[B]]) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i> // CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i> // LLVM-LABEL: test_mm_com_epu8 @@ -161,7 +159,62 @@ __m128i test_mm_com_epu64(__m128i a, __m128i b) { // OGCG-LABEL: test_mm_com_epu64 // OGCG: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}} // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> - return _mm_com_epu64(a, b, 0); + a = _mm_com_epu64(a, b, 0); + + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(le, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP:.*]] = icmp ule <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + + // OGCG: %[[CMP:.*]] = icmp ule <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + a = _mm_com_epu64(a, b, 1); + + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(gt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP:.*]] = icmp ugt <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + + // OGCG: %[[CMP:.*]] = icmp ugt <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + a = _mm_com_epu64(a, b, 2); + + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(ge, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP:.*]] = icmp uge <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + + // OGCG: %[[CMP:.*]] = icmp uge <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + a = _mm_com_epu64(a, b, 3); + + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(eq, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + + // OGCG: %[[CMP:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + a = _mm_com_epu64(a, b, 4); + + // CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i> + // CIR: %[[CMP:.*]] = cir.vec.cmp(ne, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + + // OGCG: %[[CMP:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> + return _mm_com_epu64(a, b, 5); } __m128i test_mm_com_epi8(__m128i a, __m128i b) { @@ -226,7 +279,52 @@ __m128i test_mm_com_epi64(__m128i a, __m128i b) { // OGCG-LABEL: test_mm_com_epi64 // OGCG: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}} // OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64> - return _mm_com_epi64(a, b, 0); + a = _mm_com_epi64(a, b, 0); + + // CIR: %[[CMP1:.*]] = cir.vec.cmp(le, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP1:.*]] = icmp sle <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + + // OGCG: %[[CMP1:.*]] = icmp sle <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + a = _mm_com_epi64(a, b, 1); + + // CIR: %[[CMP1:.*]] = cir.vec.cmp(gt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP1:.*]] = icmp sgt <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + + // OGCG: %[[CMP1:.*]] = icmp sgt <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + a = _mm_com_epi64(a, b, 2); + + // CIR: %[[CMP1:.*]] = cir.vec.cmp(ge, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP1:.*]] = icmp sge <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + + // OGCG: %[[CMP1:.*]] = icmp sge <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + a = _mm_com_epi64(a, b, 3); + + // CIR: %[[CMP1:.*]] = cir.vec.cmp(eq, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP1:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + + // OGCG: %[[CMP1:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + a = _mm_com_epi64(a, b, 4); + + // CIR: %[[CMP1:.*]] = cir.vec.cmp(ne, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i> + + // LLVM: %[[CMP1:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}} + // LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + + // OGCG: %[[CMP1:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}} + // OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64> + return _mm_com_epi64(a, b, 5); } __m128i test_mm_com_epi32_false(__m128i a, __m128i b) {